From 4d747b04c4fd745f9e4c30397c898d26d065c9f3 Mon Sep 17 00:00:00 2001 From: "maf46@burn.cl.cam.ac.uk" Date: Fri, 25 Feb 2005 14:48:39 +0000 Subject: [PATCH] bitkeeper revision 1.1236.3.3 (421f3ac7eVdbco19D20ncC6UepUAYw) Keep a separate shadow and "hl2" shadow of each guest L2 page. Still doing excessive clearing of these shadows, though... Signed-off-by: michael.fetterman@cl.cam.ac.uk --- xen/arch/x86/domain.c | 4 +- xen/arch/x86/mm.c | 9 +++ xen/arch/x86/shadow.c | 41 +++++++++- xen/arch/x86/vmx.c | 25 +++--- xen/include/asm-x86/shadow.h | 151 ++++++++++++++++++++++------------- xen/include/xen/perfc_defn.h | 1 + 6 files changed, 161 insertions(+), 70 deletions(-) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 1bffcb6eb3..666cb0a04f 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -410,8 +410,8 @@ static int vmx_final_setup_guest(struct exec_domain *ed, } /* We don't call update_pagetables() as we actively want fields such as - * the linear_pg_table to be null so that we bail out early of - * shadow_fault in case the vmx guest tries illegal accesses with + * the linear_pg_table to be inaccessible so that we bail out early of + * shadow_fault() in case the vmx guest tries illegal accesses with * paging turned off. */ //update_pagetables(ed); /* this assigns shadow_pagetable */ diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 2509910117..80fc421a9a 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -1957,6 +1957,9 @@ int do_update_va_mapping(unsigned long va, * page was not shadowed, or that the L2 entry has not yet been * updated to reflect the shadow. */ + if ( shadow_mode_external(current->domain) ) + BUG(); // can't use linear_l2_table with external tables. + l2_pgentry_t gpde = linear_l2_table[l2_table_offset(va)]; unsigned long gpfn = l2_pgentry_val(gpde) >> PAGE_SHIFT; @@ -2381,6 +2384,9 @@ int ptwr_do_page_fault(unsigned long addr) * Attempt to read the PTE that maps the VA being accessed. By checking for * PDE validity in the L2 we avoid many expensive fixups in __get_user(). */ + if ( shadow_mode_external(current->domain) ) + BUG(); // can't use linear_l2_table with external tables. + if ( !(l2_pgentry_val(linear_l2_table[addr>>L2_PAGETABLE_SHIFT]) & _PAGE_PRESENT) || __get_user(pte, (unsigned long *) @@ -2417,6 +2423,9 @@ int ptwr_do_page_fault(unsigned long addr) * Is the L1 p.t. mapped into the current address space? If so we call it * an ACTIVE p.t., otherwise it is INACTIVE. */ + if ( shadow_mode_external(current->domain) ) + BUG(); // can't use linear_l2_table with external tables. + pl2e = &linear_l2_table[l2_idx]; l2e = l2_pgentry_val(*pl2e); which = PTWR_PT_INACTIVE; diff --git a/xen/arch/x86/shadow.c b/xen/arch/x86/shadow.c index 43157e6d6a..a3a003fdaa 100644 --- a/xen/arch/x86/shadow.c +++ b/xen/arch/x86/shadow.c @@ -111,6 +111,10 @@ static inline int clear_shadow_page( int restart = 0; struct pfn_info *spage = &frame_table[x->smfn_and_flags & PSH_pfn_mask]; + // We don't clear hl2_table's here. At least not yet. + if ( x->pfn & PSH_hl2 ) + return 0; + switch ( spage->u.inuse.type_info & PGT_type_mask ) { /* We clear L2 pages by zeroing the guest entries. */ @@ -486,7 +490,7 @@ unsigned long shadow_l2_table( spfn_info->u.inuse.type_info = PGT_l2_page_table; perfc_incr(shadow_l2_pages); - spfn = spfn_info - frame_table; + spfn = page_to_pfn(spfn_info); /* Mark pfn as being shadowed; update field to point at shadow. */ set_shadow_status(d, gpfn, spfn | PSH_shadowed); @@ -770,6 +774,41 @@ void shadow_l2_normal_pt_update(unsigned long pa, unsigned long gpde) unmap_domain_mem(spl2e); } +unsigned long mk_hl2_table(struct exec_domain *ed) +{ + struct domain *d = ed->domain; + unsigned long gmfn = pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT; + unsigned long gpfn = __mfn_to_gpfn(d, gmfn); + unsigned long hl2mfn, status; + struct pfn_info *hl2_info; + l1_pgentry_t *hl2; + + perfc_incr(hl2_table_pages); + + if ( (hl2_info = alloc_shadow_page(d)) == NULL ) + BUG(); /* XXX Deal gracefully with failure. */ + + hl2_info->u.inuse.type_info = PGT_l1_page_table; + + hl2mfn = page_to_pfn(hl2_info); + status = hl2mfn | PSH_hl2; + set_shadow_status(ed->domain, gpfn | PSH_hl2, status); + + // need to optimize this... + hl2 = map_domain_mem(hl2mfn << PAGE_SHIFT); + memset(hl2, 0, PAGE_SIZE); + unmap_domain_mem(hl2); + + // install this hl2 as the linear_pg_table + if ( shadow_mode_external(d) ) + ed->arch.monitor_vtable[l2_table_offset(LINEAR_PT_VIRT_START)] = + mk_l2_pgentry((hl2mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR); + else + ed->arch.shadow_vtable[l2_table_offset(LINEAR_PT_VIRT_START)] = + mk_l2_pgentry((hl2mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR); + + return status; +} diff --git a/xen/arch/x86/vmx.c b/xen/arch/x86/vmx.c index a14a145521..71133367d2 100644 --- a/xen/arch/x86/vmx.c +++ b/xen/arch/x86/vmx.c @@ -137,10 +137,17 @@ static int vmx_do_page_fault(unsigned long va, struct xen_regs *regs) if (mmio_space(gpa)) handle_mmio(va, gpa); - if ((result = shadow_fault(va, regs))) - return result; - - return 0; /* failed to resolve, i.e raise #PG */ + result = shadow_fault(va, regs); + +#if 0 + if ( !result ) + { + __vmread(GUEST_EIP, &eip); + printk("vmx pgfault to guest va=%p eip=%p\n", va, eip); + } +#endif + + return result; } static void vmx_do_general_protection_fault(struct xen_regs *regs) @@ -273,19 +280,11 @@ static void vmx_vmexit_do_invlpg(unsigned long va) * copying from guest */ shadow_invlpg(ed, va); - index = (va >> L2_PAGETABLE_SHIFT); + index = l2_table_offset(va); ed->arch.hl2_vtable[index] = mk_l2_pgentry(0); /* invalidate pgd cache */ } -static inline void hl2_table_invalidate(struct exec_domain *ed) -{ - /* - * Need to optimize this - */ - memset(ed->arch.hl2_vtable, 0, PAGE_SIZE); -} - static void vmx_io_instruction(struct xen_regs *regs, unsigned long exit_qualification, unsigned long inst_len) { diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h index f29945142a..8018df1e70 100644 --- a/xen/include/asm-x86/shadow.h +++ b/xen/include/asm-x86/shadow.h @@ -9,8 +9,9 @@ #include #include -/* Shadow PT flag bits in pfn_info */ +/* Shadow PT flag bits in shadow_status */ #define PSH_shadowed (1<<31) /* page has a shadow. PFN points to shadow */ +#define PSH_hl2 (1<<30) /* page is an hl2 */ #define PSH_pfn_mask ((1<<21)-1) /* Shadow PT operation mode : shadow-mode variable in arch_domain. */ @@ -44,6 +45,7 @@ extern void unshadow_table(unsigned long gpfn, unsigned int type); extern int shadow_mode_enable(struct domain *p, unsigned int mode); extern void free_shadow_state(struct domain *d); extern void shadow_invlpg(struct exec_domain *, unsigned long); +extern unsigned long mk_hl2_table(struct exec_domain *ed); extern void vmx_shadow_clear_state(struct domain *); @@ -68,7 +70,7 @@ extern unsigned long shadow_l2_table( struct domain *d, unsigned long gmfn); static inline void shadow_invalidate(struct exec_domain *ed) { - if ( !shadow_mode_translate(ed->domain)) + if ( !ed->arch.arch_vmx.flags ) BUG(); memset(ed->arch.shadow_vtable, 0, PAGE_SIZE); } @@ -118,29 +120,27 @@ struct shadow_status { static inline void __shadow_get_l2e( struct exec_domain *ed, unsigned long va, unsigned long *sl2e) { - if ( likely(shadow_mode_enabled(ed->domain)) ) { - if ( shadow_mode_translate(ed->domain) ) - *sl2e = l2_pgentry_val( - ed->arch.shadow_vtable[l2_table_offset(va)]); - else - *sl2e = l2_pgentry_val( - shadow_linear_l2_table[l2_table_offset(va)]); - } - else + if ( !likely(shadow_mode_enabled(ed->domain)) ) BUG(); + + if ( shadow_mode_translate(ed->domain) ) + *sl2e = l2_pgentry_val( + ed->arch.shadow_vtable[l2_table_offset(va)]); + else + *sl2e = l2_pgentry_val( + shadow_linear_l2_table[l2_table_offset(va)]); } static inline void __shadow_set_l2e( struct exec_domain *ed, unsigned long va, unsigned long value) { - if ( likely(shadow_mode_enabled(ed->domain)) ) { - if ( shadow_mode_translate(ed->domain) ) - ed->arch.shadow_vtable[l2_table_offset(va)] = mk_l2_pgentry(value); - else - shadow_linear_l2_table[l2_table_offset(va)] = mk_l2_pgentry(value); - } - else + if ( !likely(shadow_mode_enabled(ed->domain)) ) BUG(); + + if ( shadow_mode_translate(ed->domain) ) + ed->arch.shadow_vtable[l2_table_offset(va)] = mk_l2_pgentry(value); + else + shadow_linear_l2_table[l2_table_offset(va)] = mk_l2_pgentry(value); } static inline void __guest_get_l2e( @@ -347,8 +347,14 @@ static void shadow_audit(struct domain *d, int print) for ( j = 0; j < shadow_ht_buckets; j++ ) { a = &d->arch.shadow_ht[j]; - if ( a->pfn ) { live++; ASSERT(a->smfn_and_flags & PSH_pfn_mask); } - ASSERT(a->pfn < 0x00100000UL); + if ( a->pfn ) + { + live++; + ASSERT(a->smfn_and_flags & PSH_pfn_mask); + } + else + ASSERT(!a->next); + ASSERT( (a->pfn & ~PSH_hl2) < 0x00100000UL); a = a->next; while ( a && (live < 9999) ) { @@ -359,7 +365,7 @@ static void shadow_audit(struct domain *d, int print) live, a->pfn, a->smfn_and_flags, a->next); BUG(); } - ASSERT(a->pfn < 0x00100000UL); + ASSERT( (a->pfn & ~PSH_hl2) < 0x00100000UL); ASSERT(a->smfn_and_flags & PSH_pfn_mask); a = a->next; } @@ -369,15 +375,22 @@ static void shadow_audit(struct domain *d, int print) for ( a = d->arch.shadow_ht_free; a != NULL; a = a->next ) free++; - if ( print) + if ( print ) printk("Xlive=%d free=%d\n",live,free); - abs = (perfc_value(shadow_l1_pages) + perfc_value(shadow_l2_pages)) - live; + // BUG: this only works if there's only a single domain which is + // using shadow tables. + // + abs = ( perfc_value(shadow_l1_pages) + + perfc_value(shadow_l2_pages) + + perfc_value(hl2_table_pages) ) - live; #ifdef PERF_COUNTERS if ( (abs < -1) || (abs > 1) ) { - printk("live=%d free=%d l1=%d l2=%d\n",live,free, - perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages) ); + printk("live=%d free=%d l1=%d l2=%d hl2=%d\n", live, free, + perfc_value(shadow_l1_pages), + perfc_value(shadow_l2_pages), + perfc_value(hl2_table_pages)); BUG(); } #endif @@ -405,6 +418,8 @@ static inline unsigned long __shadow_status( { struct shadow_status *p, *x, *head; + ASSERT(spin_is_locked(&d->arch.shadow_lock)); + x = head = hash_bucket(d, gpfn); p = NULL; @@ -570,7 +585,7 @@ static inline void set_shadow_status( ASSERT(spin_is_locked(&d->arch.shadow_lock)); ASSERT(gpfn != 0); - ASSERT(s & PSH_shadowed); + ASSERT(s & (PSH_shadowed | PSH_hl2)); x = head = hash_bucket(d, gpfn); @@ -658,7 +673,7 @@ static inline unsigned long gva_to_gpte(unsigned long gva) if (!(gpde & _PAGE_PRESENT)) return 0; - index = (gva >> L2_PAGETABLE_SHIFT); + index = l2_table_offset(gva); if (!l2_pgentry_val(ed->arch.hl2_vtable[index])) { pfn = phys_to_machine_mapping(gpde >> PAGE_SHIFT); @@ -684,6 +699,14 @@ static inline unsigned long gva_to_gpa(unsigned long gva) return (gpte & PAGE_MASK) + (gva & ~PAGE_MASK); } +static inline void hl2_table_invalidate(struct exec_domain *ed) +{ + /* + * Need to optimize this + */ + memset(ed->arch.hl2_vtable, 0, PAGE_SIZE); +} + static inline void __update_pagetables(struct exec_domain *ed) { struct domain *d = ed->domain; @@ -698,63 +721,83 @@ static inline void __update_pagetables(struct exec_domain *ed) ed->arch.shadow_table = mk_pagetable(smfn<domain) ) + if ( shadow_mode_translate(d) ) { + l2_pgentry_t *mpl2e = ed->arch.monitor_vtable; l2_pgentry_t *gpl2e, *spl2e; + unsigned long hl2_status, hl2mfn, offset; + int need_flush = 0; if ( ed->arch.guest_vtable ) unmap_domain_mem(ed->arch.guest_vtable); if ( ed->arch.shadow_vtable ) unmap_domain_mem(ed->arch.shadow_vtable); + if ( ed->arch.hl2_vtable ) + unmap_domain_mem(ed->arch.hl2_vtable); gpl2e = ed->arch.guest_vtable = map_domain_mem(pagetable_val(ed->arch.guest_table)); spl2e = ed->arch.shadow_vtable = map_domain_mem(pagetable_val(ed->arch.shadow_table)); - if ( shadow_mode_external(ed->domain ) ) + hl2_status = __shadow_status(d, gpfn | PSH_hl2); + if ( unlikely(!(hl2_status & PSH_hl2)) ) + hl2_status = mk_hl2_table(ed); + + hl2mfn = hl2_status & PSH_pfn_mask; + ed->arch.hl2_vtable = map_domain_mem(hl2mfn << PAGE_SHIFT); + + offset = l2_table_offset(LINEAR_PT_VIRT_START); + if ( hl2mfn != (l2_pgentry_val(mpl2e[offset]) >> PAGE_SHIFT) ) + { + mpl2e[offset] = + mk_l2_pgentry((hl2mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR); + need_flush = 1; + } + + if ( shadow_mode_external(d ) ) { - l2_pgentry_t *mpl2e = ed->arch.monitor_vtable; - unsigned long old_smfn; - unsigned sh_l2offset = l2_table_offset(SH_LINEAR_PT_VIRT_START); - - old_smfn = l2_pgentry_val(mpl2e[sh_l2offset]) >> PAGE_SHIFT; - if ( old_smfn != smfn ) + offset = l2_table_offset(SH_LINEAR_PT_VIRT_START); + if ( smfn != (l2_pgentry_val(mpl2e[offset]) >> PAGE_SHIFT) ) { - mpl2e[sh_l2offset] = + mpl2e[offset] = mk_l2_pgentry((smfn << PAGE_SHIFT) | __PAGE_HYPERVISOR); - local_flush_tlb(); + need_flush = 1; } } if ( ed->arch.arch_vmx.flags ) { // Why is VMX mode doing this? - memset(spl2e, 0, L2_PAGETABLE_ENTRIES * sizeof(l2_pgentry_t)); + shadow_invalidate(ed); + hl2_table_invalidate(ed); } + + if ( need_flush ) + local_flush_tlb(); } } static inline void update_pagetables(struct exec_domain *ed) { - if ( unlikely(shadow_mode_enabled(ed->domain)) ) - { - shadow_lock(ed->domain); - __update_pagetables(ed); - shadow_unlock(ed->domain); - } - if ( !shadow_mode_external(ed->domain) ) - { + if ( unlikely(shadow_mode_enabled(ed->domain)) ) + { + shadow_lock(ed->domain); + __update_pagetables(ed); + shadow_unlock(ed->domain); + } + if ( !shadow_mode_external(ed->domain) ) + { #ifdef __x86_64__ - if ( !(ed->arch.flags & TF_kernel_mode) ) - ed->arch.monitor_table = ed->arch.guest_table_user; - else + if ( !(ed->arch.flags & TF_kernel_mode) ) + ed->arch.monitor_table = ed->arch.guest_table_user; + else #endif - if ( shadow_mode_enabled(ed->domain) ) - ed->arch.monitor_table = ed->arch.shadow_table; - else - ed->arch.monitor_table = ed->arch.guest_table; - } + if ( shadow_mode_enabled(ed->domain) ) + ed->arch.monitor_table = ed->arch.shadow_table; + else + ed->arch.monitor_table = ed->arch.guest_table; + } } #if SHADOW_DEBUG diff --git a/xen/include/xen/perfc_defn.h b/xen/include/xen/perfc_defn.h index af13e71211..a252af7ac7 100644 --- a/xen/include/xen/perfc_defn.h +++ b/xen/include/xen/perfc_defn.h @@ -31,6 +31,7 @@ PERFCOUNTER_CPU( shadow_update_va_fail2, "shadow_update_va_fail2" ) /* STATUS counters do not reset when 'P' is hit */ PERFSTATUS( shadow_l2_pages, "current # shadow L2 pages" ) PERFSTATUS( shadow_l1_pages, "current # shadow L1 pages" ) +PERFSTATUS( hl2_table_pages, "current # hl2 pages" ) PERFCOUNTER_CPU( check_pagetable, "calls to check_pagetable" ) PERFCOUNTER_CPU( check_all_pagetables, "calls to check_all_pagetables" ) -- 2.30.2